library(tidyverse)
theme_set(theme_bw(base_size = 14))
pos_neg_color <- rev(scales::hue_pal()(2))
library(lubridate)
library(tsibble)
library(cmdstanr)
register_knitr_engine(override = TRUE)
library(posterior)
library(bayesplot)
color_scheme_set("red")
bayesplot_theme_set(theme_bw(base_size = 14))
# library(loo)
source("custom_functions.R")State Space Model Analysis 2
1 Load Data and Setting
rawdata <- read_tsv("data_1beep_no1st beep_annette.tsv")
data <- rawdata %>%
mutate(Pos = PA,
Neg = `NA`,
Date_Time = ymd_hms(str_glue("{year}-{month}-{day} {hour}:{min}:{sec}"), tz = "CET"),
Date = as_date(Date_Time),
Time = hms::as_hms(Date_Time),
WDay = wday(Date, label = TRUE),
Subject = factor(cumsum(PpID != lag(PpID, default = 0))),
.keep = "none") %>%
group_by(Subject) %>%
mutate(Day = factor(cumsum(Date != lag(Date, default = origin)))) %>%
group_by(Subject, Date) %>%
mutate(Moment = factor(1:n())) %>%
ungroup() %>%
pivot_longer(cols = c(Pos, Neg), names_to = "Affect", values_to = "Score") %>%
mutate(DateTime = as_datetime(ymd_hms(paste(as_date(as.double(Day)),
as.character(Time))))) %>%
as_tsibble(key = c(Subject, Affect),
index = DateTime)
rmarkdown::paged_table(data)2 State-Space Model
2.1 Model Specification
According to Schuurman & Hamaker (2019) Measurement Error Vector Autoregressive of Order 1 Model (MEVAR(1)), the model can be written as follows. However, here, I only use it for a single variable such as positive affect or negative affect.
- Level 1 model (within subject)
- Observation equation
\[ \begin{align*} y_{it} &= \mu_i + \theta_{it} + \epsilon_{it} \\ \epsilon_{it} &\sim N(0, \sigma_{\epsilon i}^2) \end{align*} \]
- State equation
\[ \begin{align*} \theta_{it} &= \phi_i \theta_{i t-1} + \omega_{it} \\ \omega_{it} &\sim \mathcal{N}(0, \sigma_{\omega i}^2) \end{align*} \]
- Level 2 model (between subject)
\[ \begin{align*} \mu_i &\sim \mathcal{N}(\gamma_\mu, \psi_\mu^2) \\ \phi_i &\sim \mathcal{N}(\gamma_\phi, \psi_\phi^2) \\ \sigma_{\epsilon i} &\sim \text{Inverse-Gamma}(\alpha_{\sigma \epsilon}, \beta_{\sigma \epsilon}) \\ \sigma_{\omega i} &\sim \text{Inverse-Gamma}(\alpha_{\sigma \omega}, \beta_{\sigma \omega}) \end{align*} \]
where
2.2 Reliability
2.3 Bayesian Analysis
multilevel-ssm-single.stan
#include ssm-function.stan
data {
int<lower=1> N; // number of subjects
array[N] int<lower=1> T; // number of observation for each subject
int<lower=1> max_T; // maximum number of observation
array[N] vector[max_T] y; // observations
}
transformed data {
array[N] real m_0; // prior mean of the intial state
array[N] real<lower=0> c_0; // prior covariance of the intial state
m_0 = rep_array(0.0, N);
c_0 = rep_array(1000, N);
}
parameters {
array[N] real mu; // ground mean/trane
array[N] real theta_0; // initial latent state
array[N] vector[max_T] theta; // latent states
array[N] real phi; // autoregressive parameters
array[N] real<lower=0> sigma_epsilon; // covariance of the measurment error
array[N] real<lower=0> sigma_omega; // covariance of the innovation noise
real gamma_mu; // prior mean of the ground mean
real<lower=0> psi_mu; // prior covariance of the ground mean
real gamma_phi; // prior mean of the autoregressive parameters
real<lower=0> psi_phi; // prior covariance of the autoregressive parameters
real<lower=0> alpha_sigma_epsilon;
real<lower=0> beta_sigma_epsilon;
real<lower=0> alpha_sigma_omega;
real<lower=0> beta_sigma_omega;
}
transformed parameters {
real mu_sigma_epsilon;
real var_sigma_epsilon;
real mu_sigma_omega;
real var_sigma_omega;
mu_sigma_epsilon = beta_sigma_epsilon / (alpha_sigma_epsilon - 1);
var_sigma_epsilon = mu_sigma_epsilon^2 / (alpha_sigma_epsilon - 2);
mu_sigma_omega = beta_sigma_omega / (alpha_sigma_omega - 1);
var_sigma_omega = mu_sigma_omega^2 / (alpha_sigma_omega - 2);
}
model {
// level 1 (within subject)
for (n in 1:N) {
// when t = 0
theta_0[n] ~ normal(m_0[n], c_0[n]);
// when t = 1
theta[n][1] ~ normal(phi[n] * theta_0[n], sigma_omega[n]);
y[n][1] ~ normal(mu[n] + theta[n][1], sigma_epsilon[n]);
// when t = 2, ..., T
for (t in 2:T[n]) {
theta[n][t] ~ normal(phi[n] * theta[n][t - 1], sigma_omega[n]);
y[n][t] ~ normal(mu[n] + theta[n][t], sigma_epsilon[n]);
}
}
// level 2 (between subject)
for (n in 1:N) {
mu[n] ~ normal(gamma_mu, psi_mu);
phi[n] ~ normal(gamma_phi, psi_phi);
sigma_epsilon[n] ~ inv_gamma(alpha_sigma_epsilon, beta_sigma_epsilon);
sigma_omega[n] ~ inv_gamma(alpha_sigma_omega, beta_sigma_omega);
}
// the (hyper)priors of parameters are set as the Stan default values
}
generated quantities {
array[N] vector[max_T] y_hat;
array[N] real tau2;
array[N] real rel_W;
real rel_B;
for (n in 1:N) {
// prediction
for (t in 1:T[n]) {
y_hat[n][t] = mu[n] + theta[n][t];
}
// within-subject reliability
tau2[n] = sigma_omega[n]^2 / (1 - phi[n]^2);
rel_W[n] = tau2[n] / (tau2[n] + sigma_epsilon[n]^2);
}
// between-subject reliability
rel_B = psi_mu^2 / (psi_mu^2 + mean(tau2) + mu_sigma_epsilon^2);
}
data_list_PA <- tibble(data) %>%
group_by(Subject) %>%
pivot_wider(names_from = Affect, values_from = Score) %>%
select(Pos) %>%
drop_na(Pos) %>%
nest() %>% ungroup() %>%
mutate(`T` = map_dbl(data, nrow),
max_T = max(`T`),
data_padding = pmap(list(data, `T`, max_T),
\(x, y, z) {
c(x$Pos, rep(Inf, z - y))
}))
mssm1_PA <- cmdstan_model("stan/multilevel-ssm-single.stan")
mssm1_PA_data <- lst(N = nrow(data_list_PA),
`T` = map(data_list_PA$data, nrow),
max_T = max(data_list_PA$`T`),
#P = 2,
y = data_list_PA$data_padding)
mssm1_PA_fit <- mssm1_PA$sample(data = mssm1_PA_data,
seed = 20240222,
chains = 8,
parallel_chains = 8,
iter_sampling = 1500,
refresh = 1000,
show_messages = FALSE)
mssm1_PA_fit$save_object(file = "stan/multilevel-ssm-PA-fit.RDS")All 8 chains finished successfully. Mean chain execution time: 6346.0 seconds. Total execution time: 6404.8 seconds. Warning: 322 of 12000 (3.0%) transitions ended with a divergence. See https://mc-stan.org/misc/warnings for details.
Warning: 11678 of 12000 (97.0%) transitions hit the maximum treedepth limit of 10. See https://mc-stan.org/misc/warnings for details.y
data_list_NA <- tibble(data) %>%
group_by(Subject) %>%
pivot_wider(names_from = Affect, values_from = Score) %>%
select(Neg) %>%
drop_na(Neg) %>%
nest() %>% ungroup() %>%
mutate(`T` = map_dbl(data, nrow),
max_T = max(`T`),
data_padding = pmap(list(data, `T`, max_T),
\(x, y, z) {
c(x$Neg, rep(Inf, z - y))
}))
mssm1_NA <- cmdstan_model("stan/multilevel-ssm-single.stan")
mssm1_NA_data <- lst(N = nrow(data_list_NA),
`T` = map(data_list_NA$data, nrow),
max_T = max(data_list_NA$`T`),
#P = 2,
y = data_list_NA$data_padding)
mssm1_NA_fit <- mssm1_NA$sample(data = mssm1_NA_data,
seed = 20240222,
chains = 8,
parallel_chains = 8,
iter_sampling = 1500,
refresh = 1000,
show_messages = TRUE)
mssm1_NA_fit$save_object(file = "stan/multilevel-ssm-NA-fit.RDS")mssm1_PA_fit <- readRDS("stan/multilevel-ssm-PA-fit.RDS")
mssm1_NA_fit <- readRDS("stan/multilevel-ssm-NA-fit.RDS")y_hat_PA_summary <- mssm1_PA_fit$summary("y_hat", mean, median, quantile2) %>%
mutate(Indices = str_extract_all(variable, "\\d+"),
Subject = map_dbl(Indices, \(x) as.integer(x[1])) %>%
factor(levels = levels(data$Subject)),
Affect = "Pos",
Time_Index = map_dbl(Indices, \(x) as.integer(x[2])))
y_hat_NA_summary <- mssm1_NA_fit$summary("y_hat", mean, median, quantile2) %>%
mutate(Indices = str_extract_all(variable, "\\d+"),
Subject = map_dbl(Indices, \(x) as.integer(x[1])) %>%
factor(levels = levels(data$Subject)),
Affect = "Neg",
Time_Index = map_dbl(Indices, \(x) as.integer(x[2])))
data_predict <- data %>%
pivot_wider(names_from = Affect, values_from = Score) %>%
select(Pos, Neg) %>%
drop_na(Pos, Neg) %>%
group_by(Subject) %>%
mutate(Time_Index = 1:n()) %>%
ungroup() %>%
pivot_longer(c("Pos", "Neg"), names_to = "Affect", values_to = "Score") %>%
left_join(y_hat_PA_summary) %>% left_join(y_hat_NA_summary)
for (i in 1:10) {
g <- data_predict %>%
filter(Subject %in% (10 * (i - 1) + 1):(10 * i)) %>%
ggplot(aes(x = DateTime, y = Score)) +
geom_line(aes(color = Affect)) +
geom_point(aes(color = Affect)) +
scale_color_manual(values = pos_neg_color) +
geom_line(aes(y = mean, group = Affect), linetype = "dashed") +
geom_ribbon(aes(ymin = q5, ymax = q95, group = Affect), alpha = 0.25) +
geom_hline(yintercept = c(0, 100), color = "forestgreen") +
scale_y_continuous(limits = c(-20, 120)) +
scale_x_datetime(breaks = as_datetime(1:7 * 86400),
labels = paste("Day", 1:7),
limits = as_datetime(c(1, 8) * 86400)) +
facet_grid(Subject ~ .)
ggsave(filename = str_glue("plots/mssm1/predict_{from}-{to}.png",
from = 10 * (i - 1) + 1, to = 10 * i),
plot = g, width = 7, height = 14)
}mssm1_PA_fit$draws(variables = "phi") %>%
`[`(, , 1:6) %>%
mcmc_trace()rel_W_draws <- mssm1_PA_fit$draws(variables = "rel_W", format = "df") %>%
left_join(mssm1_NA_fit$draws(variables = "rel_W", format = "df"),
by = c(".chain", ".iteration", ".draw")) %>%
select(-.chain, -.iteration, -.draw) %>%
pivot_longer(cols = everything(),
names_to = "variable", values_to = "value") %>%
mutate(Subject = str_extract(variable,"\\d+") %>%
factor(levels = levels(data$Subject)),
Affect = str_detect(variable, "\\.x") %>% ifelse("Pos", "Neg"))
g_rel_W <- ggplot(rel_W_draws, aes(x = 1, y = value, fill = Affect)) +
geom_split_violin() +
scale_x_continuous(name = NULL, labels = NULL, breaks = NULL) +
#scale_y_continuous(limits = c(0, 1)) +
scale_fill_manual(values = pos_neg_color) +
facet_wrap(~ Subject, ncol = 10, scales = "free_y")
ggsave("plots/rel-W-PA-NA-separatelly.png", g_rel_W, width = 14, height = 14)rel_B_PA_draws <- mssm1_PA_fit$draws(variables = "rel_B", format = "df") %>%
rename(rel_B_PA = rel_B)
rel_B_NA_draws <- mssm1_NA_fit$draws(variables = "rel_B", format = "df") %>%
rename(rel_B_NA = rel_B)
rel_B_draws <- left_join(rel_B_PA_draws, rel_B_NA_draws)
mcmc_trace(rel_B_draws)mcmc_areas(rel_B_draws %>% mutate(rel_B_diff = rel_B_PA - rel_B_NA),
prob = 0.8,
prob_outer = 0.99) +
coord_cartesian(xlim = c(-1, 1))